library(tidyverse) 
library(nls.multstart) 
library(broom)
freenome_colors <- c('#948DFF', '#1DE3FE', '#BBD532', '#FF9D42',  '#FC2E7B', 
                   '#FECDD1')

Import Data

panel_1 <- readxl::read_xlsx(here::here("data", "reproducibility-study", "raw", 
                             "betalot_reproducibility_dataframes.xlsx"), 
                  sheet = 1) %>% 
  janitor::clean_names()

panel_2 <- readxl::read_xlsx(here::here("data", "reproducibility-study", "raw", 
                             "betalot_reproducibility_dataframes.xlsx"), 
                  sheet = 2) %>% 
  janitor::clean_names()


standards_panel_1 <- panel_1 %>% 
  filter(sample_type == "standard") 

standards_panel_2 <- panel_2 %>% 
  filter(sample_type == "standard")

From the analysis here we see that plate 3 on panel 1 is the best plate in terms of bead counts and plate 2 on panel 1 is the best.

We will choose these 2 plates to work with for our simulation set up

standards_panel_1 <- panel_1 %>% 
  filter(sample_type == "standard") %>% 
  filter(plate_number == "Plate 3")
standards_panel_2 <- panel_2 %>% 
  filter(sample_type == "standard") %>% 
  filter(plate_number == "Plate 2")

combined_standards <- bind_rows(standards_panel_1, standards_panel_2)
rc_panel1 <- panel_1 %>% 
  filter(plate_number == "Plate 3") %>% 
  filter(sample_type == "sample") %>% 
  filter(xponent_id != "Assay Buffer")

rc_panel2 <- panel_2 %>% 
  filter(plate_number == "Plate 2") %>% 
  filter(sample_type == "sample") %>% 
  filter(xponent_id != "Assay Buffer")

combined_rc <- bind_rows(rc_panel1, rc_panel2)
  1. 5PL curve fit
logistic_5pl <- function(d, a, median_mfi, c, b, g) {
  return(d + (a-d)/(a + (median_mfi/c)^b)^g)
}
  1. Pick one set of standards to work with to test the 5PL fit
  1. Fit Model
afp_tmp <- nls.multstart::nls_multstart(standard_expected_concentration ~ 
                                          logistic_5pl(d, a, median_mfi = median_mfi, 
                                                       c, b, g), 
                                        data = afp_std, 
                                        iter = 10000, 
                                        modelweights = 1/standard_expected_concentration^2,
                                        start_lower = c(a = -1, b = -1, c = 100, d = -1, 
                                                        g = -1), 
                                        start_upper = c(a = 1, b = 1, c = 200, d = 1, 
                                                        g = 1), 
                                        supp_errors = "Y", 
                                        control = nls.control(maxiter = 10000, 
                                                              minFactor=1e-7, 
                                                              tol=1e-5, 
                                                              printEval=F, 
                                                              warnOnly=F))

afp_tmp
Nonlinear regression model
  model: standard_expected_concentration ~ logistic_5pl(d, a, median_mfi = median_mfi,     c, b, g)
   data: data
         d          a          c          b          g 
-32.970879  -0.001301 125.968679  -1.101397   0.880800 
 weighted residual sum-of-squares: 1.418

Number of iterations to convergence: 41 
Achieved convergence tolerance: 1.49e-08
  1. Check agreement between xponent and nls
## xponent vs R fit 
broom::augment(afp_tmp, newdata = afp_std) %>% 
  ggplot(.,aes(log(calc_conc), log(.fitted))) + 
  geom_point() + 
  theme_classic() + 
  labs(title = "xponent vs R fit", 
       x = "xponent", y = "R fit")

## Absolute difference in fitted R vs fitted xponent 
broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(diff = abs(calc_conc - .fitted)) %>% 
  ggplot(aes(diff)) + 
  geom_histogram() + 
  theme_classic() + 
  labs(title = paste0("Absolute Difference in xponent fit vs R fit (", 
                      afp_std$conc_units[1], ")"))
  

broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(diff = abs(calc_conc - .fitted)) %>% 
  filter(diff > 1000)

## RECOVERY  
broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  ggplot(.,aes(pct_recovery, recov_nls)) + 
  geom_point() + 
  theme_classic() + 
  labs(y = "custom R fit", x = "xponent fit", title = "% Recovery") + 
  scale_x_continuous(breaks = seq(88, 120, by = 2)) + 
  scale_y_continuous(breaks = seq(88, 120, by = 2))

broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  mutate(recov_diff = abs(recov_nls - pct_recovery)) %>% 
  ggplot(.,aes(recov_diff)) + 
  geom_histogram() + 
  theme_classic() + 
  labs(title = paste0("Absolute Difference in xponent fit vs R fit (% Recovery)"))


broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  mutate(recov_diff = abs(recov_nls - pct_recovery)) %>% 
  filter(recov_diff > 2)

Standard Curve Replicates

Now that we see a reasonably good agreement between the two curves, we can proceed with our simulation study

Simulation set up
1. We have a tibble/data frame
2. Randomly select n well columns for each assay
3. Fit the curve on that data
4. Check if it has converged 4. Calculate expected recovery for each standard
5. Calculate the concentration of the run controls using this fit

Things I need to

  1. Function that fits the model
fit_5pl <- function(df = NULL){
  nls.multstart::nls_multstart(standard_expected_concentration ~ 
                                          logistic_5pl(d, a, median_mfi = median_mfi, 
                                                       c, b, g), 
                                        data = df, 
                                        iter = 100000, 
                                        modelweights = 1/standard_expected_concentration^2,
                                        start_lower = c(a = -1, b = -1, c = 100, d = -1, 
                                                        g = -1), 
                                        start_upper = c(a = 1, b = 1, c = 200, d = 1, 
                                                        g = 1), 
                                        supp_errors = "Y", 
                                        control = nls.control(maxiter = 100000, 
                                                              minFactor=1e-7, 
                                                              tol=1e-5, 
                                                              printEval=F, 
                                                              warnOnly=F))
}

Reference standard concentrations

We also need a column that indicates the concentration using all 24 replicates of the plate (currently, luminex only calculates the conc for 8 replicates)

Test Chunk/Playground

temp_df <- combn(24, 2) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == "AFP") %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  tmp_columns <- temp_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
  tmp_model <- fit_5pl(df = temp_df)
  conv_log <- broom::glance(tmp_model) %>%
              select(isConv) %>%
              pull()
  
  #### Here we add RCs to the sampled_df 
  rc <- rc_calculated %>% 
    filter(assay == "AFP") %>%
    inner_join(tmp_columns)
  
  temp_df <- bind_rows(temp_df, rc)

  ## this will have backcalculated std concentrations and RCs 
  augmented_df <- broom::augment(tmp_model, newdata = temp_df) %>% 
    mutate(recov_sim = 100*(.fitted)/standard_expected_concentration, 
           num_reps_used = rep(2, nrow(.)), 
           conv = rep(as.character(conv_log), nrow(.)))
  augmented_df %>% 
    filter(sample_type == "sample") %>% 
    ggplot(.,aes(calc_conc, nls_conc)) + 
    geom_point()
  
  augmented_df %>% 
    filter(sample_type == "standard") %>% 
    ggplot(.,aes(recov_sim, pct_recovery)) + 
    geom_point()   
  
  1. Function to create what we need
iterate_model <- function(replicates = NULL,
                          protein = "NULL") {
  
  sampled_df <- combn(24, replicates) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == as.character(protein)) %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  well_columns <- sampled_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
  sampled_model <- fit_5pl(df = sampled_df)
  conv_log <- broom::glance(sampled_model) %>%
              select(isConv) %>%
              pull()
  
  #### Here we add RCs to the sampled_df 
  rc <- rc_calculated %>% 
    filter(assay == as.character(protein)) %>%
    inner_join(well_columns)
  
  sampled_df <- bind_rows(sampled_df, rc)

  ## this will have backcalculated std concentrations and RCs 
  augmented_df <- broom::augment(sampled_model, newdata = sampled_df) %>% 
    mutate(recov_sim = 100*(.fitted)/standard_expected_concentration, 
           num_reps_used = rep(replicates, nrow(.)), 
           conv = rep(as.character(conv_log), nrow(.)))
  
  return(augmented_df)
  
  
}

Now we need a way of iterating over all proteins

list_of_proteins <- combined_standards %>% select(assay) %>% 
  distinct() %>% 
  pull()
reps_2 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 2, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_3 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 3, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_4 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 4, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_5 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 5, 
                                                   protein = .x)), 
                .id = "sim_number")

Bind these dataframes

sim_combined <- bind_rows(reps_2, reps_3, reps_4, reps_5)

sim_combined <- sim_combined %>% 
  mutate(nls_recov = 100*nls_conc/standard_expected_concentration)

reps_3 %>% 
  filter(assay == "AFP") %>% 
  filter(xponent_id == "Standard1")

Distribution of CVs

sim_combined %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(cv, fill = num_reps_used)) + 
  geom_histogram(alpha = 0.5) + 
  theme_classic() + 
  labs(title = "Distribution of CVs") + 
  scale_color_manual(values = freenome_colors[1:4])
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 33600 rows containing non-finite values (stat_bin).

Recovery Absolute

sim_recov_plot <- function(std_point = "NULL") {
  sim_combined %>% 
  filter(xponent_id == std_point) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(num_reps_used, recov_sim, group = num_reps_used)) + 
  geom_point() + 
  geom_boxplot(outlier.shape = NA) + 
  facet_wrap(vars(assay)) + 
  theme_classic() + 
  labs(title = paste0("Percent Recovery For ", std_point), 
       subtitle = "Recoveries from simulations")
}
map(list_of_standards, ~ sim_recov_plot(std_point = .x))
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

Distribution of the differences in Recovery

map(list_of_standards, ~ sim_recov_diff_plot(std_point = .x))
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

Run Controls

map(list_of_proteins, ~ rc_plot_avg(protein = .x))
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]

[[9]]

[[10]]
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning: Removed 3 rows containing missing values (geom_point).

[[11]]
Warning: Removed 41 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).

[[12]]

[[13]]

[[14]]

[[15]]
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning: Removed 8 rows containing missing values (geom_point).

[[16]]

[[17]]

[[18]]

[[19]]

[[20]]

Delete a standard

Replicates 4

Replicates 3

  1. For each protein, fit a model using all 24 standards and fix it
  2. Then for each protein, sample a set of RCs and predict their concentrations based on the model.

Fix standard, change number of replicates in the RCs

models %>%
  purrr::pluck("AFP")
Nonlinear regression model
  model: standard_expected_concentration ~ logistic_5pl(d, a, median_mfi = median_mfi,     c, b, g)
   data: data
         d          a          c          b          g 
-32.970554  -0.001301 125.968188  -1.101407   0.880794 
 weighted residual sum-of-squares: 1.418

Number of iterations to convergence: 50 
Achieved convergence tolerance: 1.49e-08
iterate_model_rc <- function(replicates = NULL,
                          protein = "NULL") {
  
  sampled_df <- combn(24, replicates) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == as.character(protein)) %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  well_columns <- sampled_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
   #fit_mod <- fit_5pl(df = standard_df)   
   #standard_df <- broom::augment(fit_mod, standard_df) %>% 
    #rename(nls_conc = .fitted)  
  
   
   rc <- rc_calculated %>% 
    filter(assay == as.character(protein)) %>%
    inner_join(well_columns)
  
  #sampled_df <- bind_rows(sampled_df, rc)

  ## this will have backcalculated Cs 
  fit_mod <- models %>% pluck(protein) 
  augmented_df <- broom::augment(fit_mod, newdata = rc) %>% 
    mutate(num_reps_used = rep(replicates, nrow(.)))
  
  return(augmented_df)
}
rc_reps_2 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 2, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_3 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 3, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_4 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 4, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_5 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 5, 
                                                   protein = .x)), 
                .id = "sim_number")
rc_sim_combined <- bind_rows(rc_reps_2, rc_reps_3, rc_reps_4, rc_reps_5)  
rc_sim_combined
map(list_of_proteins, ~ rc_plot_avg(protein = .x))
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'xponent_id', 'sim_number', 'assay'. You can override using the `.groups` argument.
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]

[[9]]

[[10]]

[[11]]
Warning: Removed 181 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_point).

[[12]]

[[13]]

[[14]]

[[15]]

[[16]]

[[17]]

[[18]]

[[19]]

[[20]]

rc_cv_plot <- function(protein = "NULL") {
  rc_sim_combined %>% 
  group_by(assay, num_reps_used, xponent_id) %>% 
  summarise(avg = mean(log(.fitted + 1), na.rm = TRUE), 
            std_dev = sd(log(.fitted + 1), na.rm = TRUE), 
            cv = 100*(std_dev)/avg) %>% 
  mutate(xponent_id = fct_reorder(xponent_id, cv)) %>% 
  ungroup() %>% 
  filter(assay == protein) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(xponent_id, cv)) + 
  geom_point(aes(color = num_reps_used)) + 
    labs(title = paste0(protein)) + 
    theme_classic() 
}
map(list_of_proteins, ~ rc_cv_plot(protein = .x)) 
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
Warning in log(.fitted + 1) : NaNs produced
`summarise()` has grouped output by 'assay', 'num_reps_used'. You can override using the `.groups` argument.
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]

[[9]]

[[10]]

[[11]]

[[12]]

[[13]]

[[14]]

[[15]]

[[16]]

[[17]]

[[18]]

[[19]]

[[20]]

---
title: "Standard Curve Simulations"
output: html_notebook
---


```{r}
library(tidyverse) 
library(nls.multstart) 
library(broom)
freenome_colors <- c('#948DFF', '#1DE3FE', '#BBD532', '#FF9D42',  '#FC2E7B', 
                   '#FECDD1')
```


### Import Data  

```{r}
panel_1 <- readxl::read_xlsx(here::here("data", "reproducibility-study", "raw", 
                             "betalot_reproducibility_dataframes.xlsx"), 
                  sheet = 1) %>% 
  janitor::clean_names()

panel_2 <- readxl::read_xlsx(here::here("data", "reproducibility-study", "raw", 
                             "betalot_reproducibility_dataframes.xlsx"), 
                  sheet = 2) %>% 
  janitor::clean_names()


standards_panel_1 <- panel_1 %>% 
  filter(sample_type == "standard") 

standards_panel_2 <- panel_2 %>% 
  filter(sample_type == "standard")
```


From the analysis [here](https://docs.google.com/presentation/d/1Eqf1dTTdI_x4IWgHQ5OkuNlQVo-qRinkPVnWa_XNwTI/edit#slide=id.gfd290a948c_0_9) we see that plate 3 on panel 1 is the best plate in terms of bead counts and plate 2 on panel 1 is the best. 

We will choose these 2 plates to work with for our simulation set up  

```{r}
standards_panel_1 <- panel_1 %>% 
  filter(sample_type == "standard") %>% 
  filter(plate_number == "Plate 3")
standards_panel_2 <- panel_2 %>% 
  filter(sample_type == "standard") %>% 
  filter(plate_number == "Plate 2")

combined_standards <- bind_rows(standards_panel_1, standards_panel_2)
```


```{r}
rc_panel1 <- panel_1 %>% 
  filter(plate_number == "Plate 3") %>% 
  filter(sample_type == "sample") %>% 
  filter(xponent_id != "Assay Buffer")

rc_panel2 <- panel_2 %>% 
  filter(plate_number == "Plate 2") %>% 
  filter(sample_type == "sample") %>% 
  filter(xponent_id != "Assay Buffer")

combined_rc <- bind_rows(rc_panel1, rc_panel2)
```





1. 5PL curve fit  


```{r}
logistic_5pl <- function(d, a, median_mfi, c, b, g) {
  return(d + (a-d)/(a + (median_mfi/c)^b)^g)
}
```

2. Pick one set of standards to work with  to test the 5PL fit  

```{r}
afp_std <- standards_panel_1 %>% 
  filter(assay == "AFP") %>% 
  filter(well_column == 1 | well_column == 2 | well_column == 3 | well_column == 4 
         | well_column == 5 | well_column == 6 | well_column == 7 | well_column == 8)
```


3. Fit Model 

```{r}
afp_tmp <- nls.multstart::nls_multstart(standard_expected_concentration ~ 
                                          logistic_5pl(d, a, median_mfi = median_mfi, 
                                                       c, b, g), 
                                        data = afp_std, 
                                        iter = 10000, 
                                        modelweights = 1/standard_expected_concentration^2,
                                        start_lower = c(a = -1, b = -1, c = 100, d = -1, 
                                                        g = -1), 
                                        start_upper = c(a = 1, b = 1, c = 200, d = 1, 
                                                        g = 1), 
                                        supp_errors = "Y", 
                                        control = nls.control(maxiter = 10000, 
                                                              minFactor=1e-7, 
                                                              tol=1e-5, 
                                                              printEval=F, 
                                                              warnOnly=F))

afp_tmp



```

4. Check agreement between xponent and nls  

```{r}
## xponent vs R fit 
broom::augment(afp_tmp, newdata = afp_std) %>% 
  ggplot(.,aes(log(calc_conc), log(.fitted))) + 
  geom_point() + 
  theme_classic() + 
  labs(title = "xponent vs R fit", 
       x = "xponent", y = "R fit")

## Absolute difference in fitted R vs fitted xponent 
broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(diff = abs(calc_conc - .fitted)) %>% 
  ggplot(aes(diff)) + 
  geom_histogram() + 
  theme_classic() + 
  labs(title = paste0("Absolute Difference in xponent fit vs R fit (", 
                      afp_std$conc_units[1], ")"))
  

broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(diff = abs(calc_conc - .fitted)) %>% 
  filter(diff > 1000)

## RECOVERY  
broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  ggplot(.,aes(pct_recovery, recov_nls)) + 
  geom_point() + 
  theme_classic() + 
  labs(y = "custom R fit", x = "xponent fit", title = "% Recovery") + 
  scale_x_continuous(breaks = seq(88, 120, by = 2)) + 
  scale_y_continuous(breaks = seq(88, 120, by = 2))

broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  mutate(recov_diff = abs(recov_nls - pct_recovery)) %>% 
  ggplot(.,aes(recov_diff)) + 
  geom_histogram() + 
  theme_classic() + 
  labs(title = paste0("Absolute Difference in xponent fit vs R fit (% Recovery)"))


broom::augment(afp_tmp, newdata = afp_std) %>% 
  mutate(recov_nls = 100*(.fitted)/standard_expected_concentration) %>% 
  mutate(recov_diff = abs(recov_nls - pct_recovery)) %>% 
  filter(recov_diff > 2)
```




### Standard Curve Replicates   

Now that we see a reasonably good agreement between the two curves, we can proceed with our simulation study   


Simulation set up  
1. We have a tibble/data frame   
2. Randomly select n well columns for each assay   
3. Fit the curve on that data   
4. Check if it has converged 
4. Calculate expected recovery for each standard   
5. Calculate the concentration of the run controls using this fit  







Things I need to 

1. Function that fits the model  

```{r}
fit_5pl <- function(df = NULL){
  nls.multstart::nls_multstart(standard_expected_concentration ~ 
                                          logistic_5pl(d, a, median_mfi = median_mfi, 
                                                       c, b, g), 
                                        data = df, 
                                        iter = 100000, 
                                        modelweights = 1/standard_expected_concentration^2,
                                        start_lower = c(a = -1, b = -1, c = 100, d = -1, 
                                                        g = -1), 
                                        start_upper = c(a = 1, b = 1, c = 200, d = 1, 
                                                        g = 1), 
                                        supp_errors = "Y", 
                                        control = nls.control(maxiter = 100000, 
                                                              minFactor=1e-7, 
                                                              tol=1e-5, 
                                                              printEval=F, 
                                                              warnOnly=F))
}
```


### Reference standard concentrations  

We also need a column that indicates the concentration using all 24 replicates of the plate (currently, luminex only calculates the conc for 8 replicates) 

```{r}
############### Standard Curves ################
calc_nls_conc <- function(protein = "NULL") {
  df_mod <- combined_standards %>% 
    filter(assay == protein)
  
  fit_mod <- fit_5pl(df = df_mod) 
  df_mod <- broom::augment(fit_mod, df_mod) %>% 
    rename(nls_conc = .fitted)
  return(df_mod)
} 

list_of_proteins <- combined_standards %>% select(assay) %>% 
  distinct() %>% 
  pull()

new_combined <- map_dfr(list_of_proteins, ~ calc_nls_conc(protein = .x))

new_combined <- new_combined %>% 
  mutate(nls_recov = 100*nls_conc/standard_expected_concentration)



############ Run Controls #################
calc_rc_conc <- function(protein = "NULL") {
  
  df_mod <- combined_standards %>% 
    filter(assay == protein)
  
  rc <- combined_rc %>% 
    filter(assay == protein) 
  
  fit_mod <- fit_5pl(df = df_mod) 
  
  rc <- broom::augment(fit_mod, newdata = rc) %>% 
    rename(nls_conc = .fitted)
  return(rc)
} 

rc_calculated <- map_dfr(list_of_proteins, ~ calc_rc_conc(protein = .x))

rc_calculated %>% 
    ggplot(.,aes(calc_conc, nls_conc)) + 
   geom_point() + 
   facet_wrap(vars(assay)) + 
   scale_x_log10() + scale_y_log10()

rc_calculated %>% 
  filter(sample_type == "sample") %>% 
  group_by(xponent_id, assay) %>% 
  summarise(nls_conc = mean(nls_conc))
```




**Test Chunk/Playground**   

```{r}
temp_df <- combn(24, 2) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == "AFP") %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  tmp_columns <- temp_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
  tmp_model <- fit_5pl(df = temp_df)
  conv_log <- broom::glance(tmp_model) %>%
              select(isConv) %>%
              pull()
  
  #### Here we add RCs to the sampled_df 
  rc <- rc_calculated %>% 
    filter(assay == "AFP") %>%
    inner_join(tmp_columns)
  
  temp_df <- bind_rows(temp_df, rc)

  ## this will have backcalculated std concentrations and RCs 
  augmented_df <- broom::augment(tmp_model, newdata = temp_df) %>% 
    mutate(recov_sim = 100*(.fitted)/standard_expected_concentration, 
           num_reps_used = rep(2, nrow(.)), 
           conv = rep(as.character(conv_log), nrow(.)))
  augmented_df %>% 
    filter(sample_type == "sample") %>% 
    ggplot(.,aes(calc_conc, nls_conc)) + 
    geom_point()
  
  augmented_df %>% 
    filter(sample_type == "standard") %>% 
    ggplot(.,aes(recov_sim, pct_recovery)) + 
    geom_point()   
  
```



2. Function to create what we need  

```{r}
iterate_model <- function(replicates = NULL,
                          protein = "NULL") {
  
  sampled_df <- combn(24, replicates) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == as.character(protein)) %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  well_columns <- sampled_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
  sampled_model <- fit_5pl(df = sampled_df)
  conv_log <- broom::glance(sampled_model) %>%
              select(isConv) %>%
              pull()
  
  #### Here we add RCs to the sampled_df 
  rc <- rc_calculated %>% 
    filter(assay == as.character(protein)) %>%
    inner_join(well_columns)
  
  sampled_df <- bind_rows(sampled_df, rc)

  ## this will have backcalculated std concentrations and RCs 
  augmented_df <- broom::augment(sampled_model, newdata = sampled_df) %>% 
    mutate(recov_sim = 100*(.fitted)/standard_expected_concentration, 
           num_reps_used = rep(replicates, nrow(.)), 
           conv = rep(as.character(conv_log), nrow(.)))
  
  return(augmented_df)
  
  
}
```



Now we need a way of iterating over all proteins 

```{r}
list_of_proteins <- combined_standards %>% select(assay) %>% 
  distinct() %>% 
  pull()
```


```{r message=FALSE}
reps_2 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 2, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_3 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 3, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_4 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 4, 
                                                   protein = .x)), 
                .id = "sim_number")

reps_5 <- imap_dfr(1:20, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model(replicates = 5, 
                                                   protein = .x)), 
                .id = "sim_number")
```



Bind these dataframes   

```{r}
sim_combined <- bind_rows(reps_2, reps_3, reps_4, reps_5)

sim_combined <- sim_combined %>% 
  mutate(nls_recov = 100*nls_conc/standard_expected_concentration)

reps_3 %>% 
  filter(assay == "AFP") %>% 
  filter(xponent_id == "Standard1")
```


Distribution of CVs  

```{r}
sim_combined %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(cv, fill = num_reps_used)) + 
  geom_histogram(alpha = 0.5) + 
  theme_classic() + 
  labs(title = "Distribution of CVs") + 
  scale_color_manual(values = freenome_colors[1:4])
```
#### Recovery Absolute  

```{r}
sim_recov_plot <- function(std_point = "NULL") {
  sim_combined %>% 
  filter(xponent_id == std_point) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(num_reps_used, recov_sim, group = num_reps_used)) + 
  geom_point() + 
  geom_boxplot(outlier.shape = NA) + 
  facet_wrap(vars(assay)) + 
  theme_classic() + 
  labs(title = paste0("Percent Recovery For ", std_point), 
       subtitle = "Recoveries from simulations")
}
```


```{r}
sim_combined %>% 
  filter(sample_type == "standard") %>% 
  ggplot(.,aes(xponent_id, nls_recov)) + 
  geom_point() + 
  geom_boxplot(outlier.shape = NA) + 
  facet_wrap(vars(assay)) + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


sim_combined %>% 
  filter(xponent_id == "Standard2") %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(num_reps_used, nls_recov, group = num_reps_used)) + 
  geom_point() + 
  geom_point(aes(y = pct_recovery), color = "red", alpha = 0.5) + 
  geom_boxplot(outlier.shape = NA) + 
  facet_wrap(vars(assay))  

list_of_standards <- c("Standard1", "Standard2", "Standard3", "Standard4", 
                       "Standard5", "Standard6") 

map(list_of_standards, ~ sim_recov_plot(std_point = .x))
```


#### Distribution of the differences in Recovery    



```{r}
sim_combined <- sim_combined %>% 
  mutate(recov_diff = abs(recov_sim - nls_recov))

sim_recov_diff_plot <- function(std_point = "NULL") {
  sim_combined %>% 
  filter(xponent_id == std_point) %>% 
  filter(assay != "CA15-3") %>% 
    filter(assay != "MIF") %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(num_reps_used, recov_diff, group = num_reps_used)) + 
  geom_point() + 
  geom_boxplot(outlier.shape = NA) + 
  facet_wrap(vars(assay)) + 
  theme_classic() + 
  labs(title = paste0("Percent Recovery Difference For ", std_point), 
       subtitle = "")
}

sim_combined %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(num_reps_used, recov_diff, group = num_reps_used)) + 
  geom_point(size = 0.25) + 
  geom_boxplot(outlier.shape = NA) +
  facet_wrap(vars(assay)) + 
  theme_classic()

map(list_of_standards, ~ sim_recov_diff_plot(std_point = .x))
```




#### Run Controls   

```{r}
sim_combined %>% 
  filter(sample_type == "sample") %>% 
  filter(num_reps_used == 2) %>% 
  #mutate(abs_diff = abs(calc_conc - nls_conc)) %>% 
  ggplot(.,aes(calc_conc + 1, nls_conc + 1)) + 
  geom_point() + 
  scale_x_log10() + scale_y_log10() + 
  facet_wrap(vars(assay))

sim_combined %>% 
  filter(sample_type == "sample") %>% 
  filter(num_reps_used == 3) %>% 
  #mutate(abs_diff = abs(calc_conc - nls_conc)) %>% 
  ggplot(.,aes(calc_conc + 1, nls_conc + 1)) + 
  geom_point() + 
  scale_x_log10() + scale_y_log10() + 
  facet_wrap(vars(assay))

sim_combined %>% 
  filter(sample_type == "sample") %>% 
  filter(num_reps_used == 4) %>% 
  #mutate(abs_diff = abs(calc_conc - nls_conc)) %>% 
  ggplot(.,aes(calc_conc + 1, nls_conc + 1)) + 
  geom_point() + 
  scale_x_log10() + scale_y_log10() + 
  facet_wrap(vars(assay))

sim_combined %>% 
  filter(sample_type == "sample") %>% 
  filter(num_reps_used == 5) %>% 
  #mutate(abs_diff = abs(calc_conc - nls_conc)) %>% 
  ggplot(.,aes(calc_conc + 1, nls_conc + 1)) + 
  geom_point() + 
  scale_x_log10() + scale_y_log10() + 
  facet_wrap(vars(assay))



rc_plot_one <- function(protein = "NULL") {
  sim_combined %>% 
  filter(sample_type == "sample") %>% 
  filter(assay == protein) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(xponent_id, log(.fitted +1), group = num_reps_used, 
               color = num_reps_used)) + 
  geom_point(size = 1.5, position = position_dodge(width = 0.5)) + 
  geom_point(aes(xponent_id, log(nls_conc + 1)), color = "blue", size = 0.5) + 
  theme_classic() + 
  labs(title = paste0("Run Controls For ", protein), 
       subtitle = "Dark Blue Points are fitted values using all 24 standards")
}

#### we need a data set where the RCs are mean of 24 replicates  
rc_avg <- rc_calculated %>% 
  filter(sample_type == "sample") %>% 
  group_by(assay, xponent_id) %>% 
  summarise(nls_conc = mean(nls_conc))

rc_plot_avg <- function(protein = "NULL") {
  sim_combined %>% 
  filter(sample_type == "sample") %>% 
  group_by(xponent_id, sim_number, assay, num_reps_used) %>%
  summarise(.fitted = mean(.fitted)) %>% 
  ungroup() %>% 
  filter(assay == protein) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(xponent_id, log(.fitted +1), 
               color = num_reps_used)) + 
  geom_point(size = 1.5, position = position_dodge(width = 0.5)) + 
  geom_point(data = rc_avg %>% filter(assay == protein), aes(xponent_id, log(nls_conc + 1)), 
             color = "blue", size = 1) + 
  theme_classic() + 
  labs(title = paste0("Run Controls For ", protein), 
       subtitle = "Dark Blue Points are fitted values using all 24 standards")
}


map(list_of_proteins, ~ rc_plot_avg(protein = .x))
```












### Delete a standard   

#### Replicates 4    




#### Replicates 3   





1. For each protein, fit a model using all 24 standards and fix it  
2. Then for each protein, sample a set of RCs and predict their concentrations based on the model.   
3. 





### Fix standard, change number of replicates in the RCs   

```{r}
models <- map(list_of_proteins,  ~ fit_5pl(df = combined_standards %>% filter(assay == .x)))
names(models) <- list_of_proteins
models %>%
  purrr::pluck("AFP")

```


```{r}
iterate_model_rc <- function(replicates = NULL,
                          protein = "NULL") {
  
  sampled_df <- combn(24, replicates) %>%
    as.matrix() %>%
    data.frame() %>%
    select(sample(1:ncol(.), 1)) %>%
    rename(well_column = 1) %>%
    inner_join(., new_combined) %>% 
    filter(assay == as.character(protein)) %>% 
    group_by(xponent_id) %>% 
    mutate(cv = 100*sd(median_mfi)/mean(median_mfi)) %>% 
    ungroup()
  
  well_columns <- sampled_df %>% 
    select(well_column) %>% 
    distinct(well_column)
  
   #fit_mod <- fit_5pl(df = standard_df)   
   #standard_df <- broom::augment(fit_mod, standard_df) %>% 
    #rename(nls_conc = .fitted)  
  
   
   rc <- rc_calculated %>% 
    filter(assay == as.character(protein)) %>%
    inner_join(well_columns)
  
  #sampled_df <- bind_rows(sampled_df, rc)

  ## this will have backcalculated Cs 
  fit_mod <- models %>% pluck(protein) 
  augmented_df <- broom::augment(fit_mod, newdata = rc) %>% 
    mutate(num_reps_used = rep(replicates, nrow(.)))
  
  return(augmented_df)
}

```





```{r message = FALSE, warning = FALSE}
rc_reps_2 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 2, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_3 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 3, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_4 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 4, 
                                                   protein = .x)), 
                .id = "sim_number")

rc_reps_5 <- imap_dfr(1:100, ~map_dfr(list_of_proteins, ~ 
                                     iterate_model_rc(replicates = 5, 
                                                   protein = .x)), 
                .id = "sim_number")
```


```{r}
rc_sim_combined <- bind_rows(rc_reps_2, rc_reps_3, rc_reps_4, rc_reps_5)  
```


```{r}
rc_sim_combined
```


```{r}
rc_avg <- rc_calculated %>% 
  filter(sample_type == "sample") %>% 
  group_by(assay, xponent_id) %>% 
  summarise(nls_conc = mean(nls_conc))

rc_plot_avg <- function(protein = "NULL") {
  rc_sim_combined %>% 
  filter(sample_type == "sample") %>% 
  group_by(xponent_id, sim_number, assay, num_reps_used) %>%
  summarise(.fitted = mean(.fitted)) %>% 
  ungroup() %>% 
  filter(assay == protein) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(xponent_id, log(.fitted +1), 
               color = num_reps_used)) + 
  geom_point(size = 1.5, position = position_dodge(width = 0.5)) + 
  geom_point(data = rc_avg %>% filter(assay == protein), aes(xponent_id, log(nls_conc + 1)), 
             color = "blue", size = 1) + 
  theme_classic() + 
  labs(title = paste0("Run Controls For ", protein), 
       subtitle = "Dark Blue Points are fitted values using all 24 standards")
}


map(list_of_proteins, ~ rc_plot_avg(protein = .x))
```


```{r}
rc_cv_plot <- function(protein = "NULL") {
  rc_sim_combined %>% 
  group_by(assay, num_reps_used, xponent_id) %>% 
  summarise(avg = mean(log(.fitted + 1), na.rm = TRUE), 
            std_dev = sd(log(.fitted + 1), na.rm = TRUE), 
            cv = 100*(std_dev)/avg) %>% 
  mutate(xponent_id = fct_reorder(xponent_id, cv)) %>% 
  ungroup() %>% 
  filter(assay == protein) %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(xponent_id, cv)) + 
  geom_point(aes(color = num_reps_used)) + 
    labs(title = paste0(protein)) + 
    theme_classic() 
}
map(list_of_proteins, ~ rc_cv_plot(protein = .x)) 




  rc_sim_combined %>% 
  group_by(assay, num_reps_used, xponent_id) %>% 
  summarise(avg = mean(log(.fitted + 1), na.rm = TRUE), 
            std_dev = sd(log(.fitted + 1), na.rm = TRUE), 
            cv = 100*(std_dev)/avg) %>% 
  mutate(assay = fct_reorder(assay, cv)) %>% 
  ungroup() %>% 
  mutate(num_reps_used = as.character(num_reps_used)) %>% 
  ggplot(.,aes(assay, cv)) + 
  geom_point(aes(color = num_reps_used), position = position_dodge(width = 1)) + 
  labs(title = "") + 
  theme_classic() + 
  facet_wrap(vars(xponent_id))
  
  
```






















